/*****************************************************************************
State of Aadhaar Survey 2017-2018

Title: 2_Data_quality_pub.do
Author: IDinsight
Contact: stateofaadhaar@idinsight.org
Date: 29 August 2018
Data: "SOA2018_nonroster_cleaned_gen.dta", "SOA2018_roster_cleaned_gen.dta"
User-written commands: estout (ssc install estout if not installed)
Description: 	This .do file conducts analysis for the Data Quality section and
				produces output tables in "2_Data_Quality.rtf".

Contents:
	
	1. Analysis using non roster data
		- Survey set up
		- Tabulations / Proportions / Means
		- Regressions / Hypothesis tests
		
	2. Analysis using roster data
		- Survey set up
		- Tabulations / Proportions / Means
		- Regressions / Hypothesis tests
	
Missing data code:
	.r = refused
	.d = don't know	
*****************************************************************************/

	
* Setting up
	
	version 14
	capture log close
	clear all
	mac drop _all
	set more off
	
	* Please replace "..." below with the correct file path on your computer
	if "`c(os)'"=="MacOSX"{
		global dir "/Users/`c(username)'/.../SOA2018_data_release/"
		}
	else{
		global dir "C:/Users/`c(username)'/.../SOA2018_data_release/"
		}

/*****************************************************************************
1. Analysis using non roster data
*****************************************************************************/

	*** Survey set up

		cd "${dir}/Data_sets/"
		use "SOA2018_nonroster_cleaned_gen.dta", clear

		drop hh_id
		rename master_key hh_id 
		svyset district_id [pweight=weight_resp_adj] || AC_id || ps_id || hh_id || _n
		cd "${dir}/Output_tables/"
		

	***  Tabulations / Proportions / Means

		/*****************************************************************************
		Q1	You mentioned you have fixed the error in your Aadhaar card: Overall, how easy or difficult did you find the process of fixing the error in your Aadhaar card?
		* Types of analysis: Proportion  aadhaar_etfixease
		*****************************************************************************/
				
			eststo clear
				
			eststo: estpost svy: tab aadhaar_etfixease, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (aadhaar_etfixease ==.d | aadhaar_etfixease ==.r)
			estadd scalar missing  = r(N)
			count if (aadhaar_etfixease ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab aadhaar_etfixease if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (aadhaar_etfixease ==.d | aadhaar_etfixease ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (aadhaar_etfixease ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.1 Perceived ease of fixing error in Aadhaar (among respondents who had an error and tried to fix; numbers in percentage)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates."	///
				"Respondents were asked: 'Overall, how easy or difficult did you find the process of fixing the error in your Aadhaar card?' and were given the following options to choose from: 'Easy', 'Neutral' and 'Difficult'") ///
				replace 
				
			eststo clear	
		
		
		/*****************************************************************************
		Q7	Have you tried to have your address updated to your current address?
		* Types of analysis: Proportion  ad_addressupdate
		*****************************************************************************/		
				
			eststo clear
				
			eststo: estpost svy: tab ad_addressupdate, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (ad_addressupdate ==.d | ad_addressupdate ==.r)
			estadd scalar missing  = r(N)
			count if (ad_addressupdate ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab ad_addressupdate if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (ad_addressupdate ==.d | ad_addressupdate ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (ad_addressupdate ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.2 Percentage of respondents who tried to update the address on their Aadhaar (among those whose current address is different from the one on their Aadhaar)")	///
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates.") ///
				append 
				
			eststo clear	
				
			
		/*****************************************************************************
		Q13	Have you tried to have your mobile phone number on your Aadhaar card updated to your current mobile phone number?
		* Types of analysis: Proportion  ad_mobupdate
		*****************************************************************************/
			
			eststo clear
				
			eststo: estpost svy: tab ad_mobupdate, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (ad_mobupdate ==.d | ad_mobupdate ==.r)
			estadd scalar missing  = r(N)
			count if (ad_mobupdate ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab ad_mobupdate if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (ad_mobupdate ==.d | ad_mobupdate ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (ad_mobupdate ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.3 Percentage of respondents who tried to update the mobile number on their Aadhaar (among those who had changed their mobile number since enrolling)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates.") ///
				append 
				
			eststo clear	
				
				
		/*****************************************************************************
		Q17	Overall, how easy or difficult did you find the process of updating the information (mobile and/or address) of your Aadhaar card?
		* Types of analysis: Proportion  aadhaar_updateease
		*****************************************************************************/
			
			eststo clear
				
			eststo: estpost svy: tab aadhaar_updateease, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (aadhaar_updateease ==.d | aadhaar_updateease ==.r)
			estadd scalar missing  = r(N)
			count if (aadhaar_updateease ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab aadhaar_updateease if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (aadhaar_updateease ==.d | aadhaar_updateease ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (aadhaar_updateease ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.4 Perceived ease of updating information in Aadhaar (among respondents who successfully updated their address or mobile phone number; numbers in percentage)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." ///
				"Respondents were asked: 'Overall, how easy or difficult did you find the process of updating the information (mobile and/or address) of your Aadhaar card?' and were given the following options to choose from: 'Easy', 'Neutral' and 'Difficult'") ///	
				append 
				
			eststo clear	
				
			
		/*****************************************************************************
		Q22	Do you have a duplicate Aadhaar?
		* Types of analysis: Proportion  duplicateaadhaar
		*****************************************************************************/	
		
			eststo clear
				
			eststo: estpost svy: tab duplicateaadhaar, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (duplicateaadhaar ==.d | duplicateaadhaar ==.r)
			estadd scalar missing  = r(N)
			count if (aadhaar_updateease ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab duplicateaadhaar if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (duplicateaadhaar ==.d | duplicateaadhaar ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (duplicateaadhaar ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.5 Percentage of respondents with duplicate Aadhaar cards (among those who have an Aadhaar)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates."	///
				"Duplicates are defined as cases where the respondent has two cards with the same demographic information and different Aadhaar numbers. Such responses were verified in two ways: 1) enumerators visually inspected the Aadhaar cards and, 2) we called back each of these respondents as back checks to ensure accuracy.") ///
				append 
				
			eststo clear	
					
			
		/*****************************************************************************
		Q23	Do you have a duplicate voter ID?
		* Types of analysis: Proportion  duplicatevid
		*****************************************************************************/	

			eststo clear
				
			eststo: estpost svy: tab duplicatevid, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (duplicatevid ==.d | duplicatevid ==.r)
			estadd scalar missing  = r(N)
			count if (duplicatevid ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab duplicatevid if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (duplicatevid ==.d | duplicatevid ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (duplicatevid ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.6 Percentage of respondents with duplicate voter IDs (among those who have a voter ID)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates."	///
				"Duplicates are defined as cases where the respondent has two cards with the same demographic information but different voter ID numbers. Enumerators inspected the voter ID cards to verify such responses.") ///
				append 
					
			eststo clear			
	
	
	
/*****************************************************************************
2. Analysis using roster data
*****************************************************************************/

	*** Survey set up

		cd "${dir}/Data_sets/"
		use "SOA2018_roster_cleaned_gen.dta", clear

		rename master_key hh_id 
		svyset district_id [pweight=weight_hh_adj] || AC_id || ps_id || hh_id || _n
		cd "${dir}/Output_tables/"
		

	***  Tabulations / Proportions / Means	

		/*****************************************************************************
		Q1	Does the household member have an error in their Aadhaar Card?
		* Types of analysis: Proportion  aadhaar_fm
		*****************************************************************************/	
			
			eststo clear
				
			eststo: estpost svy: tab aadhaarerror_fm, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (aadhaarerror_fm ==.d | aadhaarerror_fm ==.r)
			estadd scalar missing  = r(N)
			count if (aadhaarerror_fm ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab aadhaarerror_fm if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (aadhaarerror_fm ==.d | aadhaarerror_fm ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (aadhaarerror_fm ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.7 Percentage of residents who had an error in their Aadhaar (among those who have an Aadhaar)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent.") ///
				append 
				
			eststo clear		
				
				
		/*****************************************************************************
		Q2 Did you try to get the error corrected?
		* Types of analysis: Proportion  aadhaarerror_fm
		*****************************************************************************/	
				
			eststo clear
				
			eststo: estpost svy: tab aadhaar_rectifytry, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (aadhaar_rectifytry ==.d | aadhaar_rectifytry ==.r)
			estadd scalar missing  = r(N)
			count if (aadhaar_rectifytry ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab aadhaar_rectifytry if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (aadhaar_rectifytry ==.d | aadhaar_rectifytry ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (aadhaar_rectifytry ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.8 Percentage of residents who tried to get the error fixed (among those who had an error)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who stated that they have an error in their Aadhaar.") ///
				append 
				
			eststo clear			
			
		/*****************************************************************************
		Q3	Did the error get corrected?
		* Types of analysis: Proportion  correction
		*****************************************************************************/	
			
			eststo clear
				
			eststo: estpost svy: tab correction, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (correction ==.d | correction ==.r)
			estadd scalar missing  = r(N)
			count if (correction ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab correction if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (correction ==.d | correction ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (correction ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.9 Percentage of residents who were successful in getting the error fixed (among those who tried to get the error fixed)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who tried to get the error in their Aadhaar fixed.") ///
				append 
				
			eststo clear			
				
		/*****************************************************************************
		Q4	Did you have to pay to get the error in your Aadhaar card fixed?
		* Types of analysis: Proportion  aadhaar_paymentet
		*****************************************************************************/	
				
			eststo clear
				
			eststo: estpost svy: tab aadhaar_paymentet, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (aadhaar_paymentet ==.d | aadhaar_paymentet ==.r)
			estadd scalar missing  = r(N)
			count if (aadhaar_paymentet ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab aadhaar_paymentet if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (aadhaar_paymentet ==.d | aadhaar_paymentet ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (aadhaar_paymentet ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.10 Percentage of residents who paid to get the error fixed (among those who got the error fixed successfully)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who were successful in getting the error fixed.") ///
				append 
				
			eststo clear		
				
				
		/*****************************************************************************
		Q5	How much did you have to pay?
		* Types of analysis: Proportion  aadhaar_paymentet
		*****************************************************************************/	
			
			eststo clear
				
			eststo: estpost svy: tab payscale, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (payscale ==.d | payscale ==.r)
			estadd scalar missing  = r(N)
			count if (payscale ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab payscale if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (payscale ==.d | payscale ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (payscale ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.11 Amount paid for fixing errors, in Rupees (among residents who paid to get the error fixed; numbers in percentage)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who paid to get the error fixed.") ///
				append 
				
			eststo clear			
				
		/*****************************************************************************
		Q6	By the UIDAI guidelines, it should not cost more than 15 rupees to update information such as name, address, etc. Were you aware of this fact?
		* Types of analysis: Proportion  aadhaar_paymentet
		*****************************************************************************/	
		
			eststo clear
				
			eststo: estpost svy: tab adupdaters_awareness, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (adupdaters_awareness ==.d | adupdaters_awareness ==.r)
			estadd scalar missing  = r(N)
			count if (adupdaters_awareness ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab adupdaters_awareness if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (adupdaters_awareness ==.d | adupdaters_awareness ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (adupdaters_awareness ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.12 Percentage of respondents who are aware that it should cost no more than Rs. 15 for update as per UIDAI regulations (among those who paid to get the error corrected)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "According to the latest UIDAI guidelines, it costs Rs. 25 to update information in one's Aadhaar. However, at the time of update for these household members the cost would have been Rs. 15.") ///
				append 
				
			eststo clear				
			
		/*****************************************************************************
		Q7	What type of error does the Aadhaar card have?
		* Types of analysis: Proportion  aadhaaret_fm
		*****************************************************************************/	
			
		* Pooled
			
			eststo clear	
				
			foreach var of varlist aadhaaret_fm_* {	
				eststo: estpost svy: tab `var', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (`var' ==.d | `var' ==.r)
				estadd scalar missing  = r(N)
				count if (`var' ==.e)
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.13.1 Types of errors in Aadhaar (among residents whose Aadhaar had errors; numbers in percentage) [All three states]") ///
				nostar ///
				nonumbers ///
				mtitles ("Name" "Address" "Date of birth") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who stated that they have an error in their Aadhaar.") ///
				append 
				
			eststo clear	
				
		* By state
			
			eststo clear	
			local IDs aadhaaret_fm_1 aadhaaret_fm_2 aadhaaret_fm_3 
				
			tokenize `" "Andhra Pradesh" "Rajasthan" "West bengal" "'
			forv i = 1/3{
			foreach var of varlist aadhaaret_fm_* {	
				display `i' 
				eststo: estpost svy: tab `var' if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (`var' ==.d | `var' ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (`var' ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			local k = `i' + 1
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.13.`k' Types of errors in Aadhaar (among residents whose Aadhaar had errors; numbers in percentage) [State: ``i++'']") ///
				nostar ///
				nonumbers ///
				mtitles ("Name" "Address" "Date of birth") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who stated that they have an error in their Aadhaar.") ///
				append 
				
			eststo clear	
			}	
			
		/*****************************************************************************
		Q6	Reason for error
		*****************************************************************************/	
		
			eststo clear
				
			eststo: estpost svy: tab errorwhy, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (errorwhy ==.d | errorwhy ==.r)
			estadd scalar missing  = r(N)
			count if (errorwhy ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab errorwhy if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (errorwhy ==.d | errorwhy ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (errorwhy ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				coeflabels(1 "Data entry error at the center/camp" 2 "Errors in other IDs submitted" 100 "I made a mistake in giving my details" 101 "Child did not have a name")	///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.14 (Self-reported) reasons for errors in Aadhaar (among those who had an error in their Aadhaar)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates." "This question was asked about all household members of the main respondent who stated that they have an error in their Aadhaar.") ///
				append
				
			eststo clear		
			
		/*****************************************************************************
		Q11	Did you have any errors in your voter ID card?
		* Types of analysis: Proportion  aadhaaret_fm
		*****************************************************************************/	
			
			eststo clear
				
			eststo: estpost svy: tab voterid_errors, percent nototal ci
			estadd matrix cil = e(lb)
			estadd matrix ciu = e(ub)
			count if (voterid_errors ==.d | voterid_errors ==.r)
			estadd scalar missing  = r(N)
			count if (voterid_errors ==.e) 
			estadd scalar er  = r(N)
			
			forvalues i = 1/3 {
				display `i' 
				eststo: estpost svy: tab voterid_errors if state == `i', percent nototal ci
				estadd matrix cil = e(lb)
				estadd matrix ciu = e(ub)
				count if (voterid_errors ==.d | voterid_errors ==.r) & state == `i'
				estadd scalar missing  = r(N)
				count if (voterid_errors ==.e) & state == `i'
				estadd scalar er  = r(N)
					}
			
			esttab using "2_Data_Quality.rtf", ///
				compress ///
				collabels(none) ///
				eqlabels(none) ///
				label ///
				modelwidth(0) ///
				incelldelimiter(-) ///
				cells(b(fmt(1)) "cil & ciu") ///
				title ("Table 2.15 Percentage of residents who had errors in their voter ID (among those who have voter ID)") ///	
				nostar ///
				nonumbers ///
				mtitles ("All three states" "Andhra Pradesh" "Rajasthan" "West Bengal") ///
				nogaps ///
				stats(N missing, fmt(0) label("Number of observations" "Number of missing observations (don't know / refused)")) ///
				nonotes ///
				addnotes("Notes: 95% confidence intervals are under point estimates.") ///
				append 
				
			eststo clear				
	
	
	***  Regressions / Hypothesis tests
	
		** Errors: By Caste, Gender, Education, Religion **
			
			* Pooled
			
				loc option append
				eststo clear
				local regressors `" "sc_cat st_cat" rel_muslim female_member member_noschool member_above60"'
				local j = 1
				foreach var in `regressors'{
					qui svy: regress aadhaarerror_fm `var'
					gen sample = e(sample)
					count if (aadhaarerror_fm ==.d | aadhaarerror_fm ==.r)
					loc miss = r(N)
					count if (aadhaarerror_fm ==.e)
					loc errors = r(N)
					
					preserve
					keep if sample == 1
					svy: mean `var'
					if `j++'==1{
						loc x1 = _b[sc_cat] 
						loc x2 = _b[st_cat]
					}
					else{
						loc x1 = _b[`var']
						loc x2 = .
					}
					svy: mean aadhaarerror_fm
					loc y1 = _b[aadhaarerror_fm]
					eststo: svy: regress aadhaarerror_fm `var'
					estadd scalar missing = `miss'
					estadd scalar errors = `errors'
					estadd scalar y = `y1'
					estadd scalar x = `x1'
					estadd scalar z = `x2'
					restore
					drop sample
				}
				esttab using "2_Data_Quality.rtf", ///
					compress ///
					eqlabels(none) ///
					label ///
					title ("Table 2.16.1 Hypothesis tests of differences in the likelihood of having error in their Aadhaar among members of different vulnerable communities [All three states]") ///	
					coeflabels (sc_cat "SC household member" st_cat "ST household member" female_member "Female household member" rel_muslim "Muslim household member" member_noschool "(Adult) household member has not attended school" member_above60 "(Adult) household member above age 60") ///
					mtitles ("Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar") ///
					p ///
					star (* 0.1 ** 0.05 *** 0.01) ///
					lines ///
					b (3) ///
					p (2) ///
					nogaps ///
					stats(N r2 y, fmt(0 3 3) label("Number of observations" "R-squared" "Mean of dependent variable")) ///
					nonotes ///
					addnotes("Notes: p-values in parentheses, with ***, **, * indicating significance at 1, 5 and 10%. No correction for multiple hypothesis testing has been applied to the results in the table." ///
					"We test the null hypotheses that there are no differences in the likelihood of having error in their Aadhaar between vulnerable respondents and other respondents, with vulnerability being proxied by each of the categories above. Each column presents coefficients from a regression of the outcome variable on a dummy variable for the corresponding category and a constant. Hence we separately examine whether each individual type above has a different likelihood of having error in their Aadhaar compared to all other individuals (i.e. all those not in the specified type).")	///
					`option'
					loc option append
					
				eststo clear
			
			* By state
			
				tokenize `" "Andhra Pradesh" "Rajasthan" "West Bengal" "'
				loc option append
				forv i = 1/3{
					eststo clear
					local regressors `" "sc_cat st_cat" rel_muslim female_member member_noschool member_above60"'
					local j = 1
					foreach var in `regressors'{
						qui svy: regress aadhaarerror_fm `var' if state == `i'
						gen sample = e(sample)
						count if (aadhaarerror_fm ==.d | aadhaarerror_fm ==.r) & state == `i'
						loc miss = r(N)
						count if (aadhaarerror_fm ==.e) & state == `i'
						loc errors = r(N)
						
						preserve
						keep if sample == 1
						svy: mean `var'
						if `j++'==1{
							loc x1 = _b[sc_cat] 
							loc x2 = _b[st_cat]
						}
						else{
							loc x1 = _b[`var']
							loc x2 = .
						}
						svy: mean aadhaarerror_fm
						loc y1 = _b[aadhaarerror_fm]
						eststo: svy: regress aadhaarerror_fm `var'
						estadd scalar missing = `miss'
						estadd scalar errors = `errors'
						estadd scalar y = `y1'
						estadd scalar x = `x1'
						estadd scalar z = `x2'
						restore
						drop sample
					}
					local k = `i' + 1
				esttab using "2_Data_Quality.rtf", ///
						compress ///
						eqlabels(none) ///
						label ///
						title ("Table 2.16.`k' Hypothesis tests of differences in the likelihood of having error in their Aadhaar among members of different vulnerable communities [State: ``i++'']") ///	
						coeflabels (sc_cat "SC household member" st_cat "ST household member" female_member "Female household member" rel_muslim "Muslim household member" member_noschool "(Adult) household member has not attended school" member_above60 "(Adult) household member above age 60") ///
						mtitles ("Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar" "Has error in Aadhaar") ///
						p ///
						star (* 0.1 ** 0.05 *** 0.01) ///
						lines ///
						b (3) ///
						p (2) ///
						nogaps ///
						stats(N r2 y, fmt(0 3 3) label("Number of observations" "R-squared" "Mean of dependent variable")) ///
						nonotes ///
						addnotes("Notes: p-values in parentheses, with ***, **, * indicating significance at 1, 5 and 10%. No correction for multiple hypothesis testing has been applied to the results in the table." ///
						"See footnote to Table 2.16.1 for a description of the hypotheses tested here.")	///
						`option'
						loc option append
				}		
				eststo clear
